tri_data <- read_csv("../data/tri_data.csv", show_col_types = FALSE)
tri_data %>%
slice(1:10000) %>%
vis_dat()
tri_data %>%
slice(1:10000) %>%
vis_miss()
introduction to dataset & summary stats introduce primary research question: what is the relationship between industry sector and location, pollution, chemical use and has this shifted over the past 10 years?
location of facilities, location trends, tribal lands, proximity question: how many facilities are located on tribal land and is there a significant difference in the type and amount of chemical release when compared to facilities not on tribal land?
facility_location <- tri_data %>%
select("facility_name", "street_address", "city", "county", "state", "zip_code", "bia_code", "tribal_land", "latitude", "longitude") %>%
distinct()
facility_location <- facility_location %>%
mutate(tribal_yes_no = case_when(is.na(tribal_land) == FALSE ~ "Tribal Land",
TRUE ~ "Not Tribal Land"))
tri_data %>%
distinct(facility_name) %>%
count()
## # A tibble: 1 × 1
## n
## <int>
## 1 26267
tri_data %>%
select(standard_parent_co_name, parent_co_name, facility_name) %>%
group_by(standard_parent_co_name, parent_co_name) %>%
summarise(number_of_facilities = n_distinct(facility_name)) %>%
arrange(desc(number_of_facilities))
## `summarise()` has grouped output by 'standard_parent_co_name'. You can override
## using the `.groups` argument.
## # A tibble: 7,925 × 3
## # Groups: standard_parent_co_name [5,483]
## standard_parent_co_name parent_co_name number_of_facilities
## <chr> <chr> <int>
## 1 <NA> <NA> 6433
## 2 US DEPARTMENT OF DEFENSE US DEPARTMENT OF DEFENSE 324
## 3 CRH AMERICAS INC CRH AMERICAS INC 252
## 4 CEMEX INC CEMEX INC 208
## 5 ARGOS USA CORP ARGOS USA CORP 186
## 6 BERKSHIRE HATHAWAY INC BERKSHIRE HATHAWAY INC 186
## 7 CLEAN HARBORS INC CLEAN HARBORS INC 136
## 8 KOCH INDUSTRIES INC KOCH INDUSTRIES INC 121
## 9 MARTIN MARIETTA MATERIALS INC MARTIN MARIETTA MATERIALS… 118
## 10 TYSON FOODS INC TYSON FOODS INC 110
## # ℹ 7,915 more rows
tri_data %>%
distinct(longitude, latitude, facility_name, parent_co_name, industry_sector, city, state) %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(lng = ~longitude,
lat = ~latitude,
clusterOptions = markerClusterOptions(),
popup = ~paste0(facility_name, "<br>", parent_co_name, "<br>", industry_sector, "<br>", city, ", ", state))
## Warning in validateCoords(lng, lat, funcName): Data contains 2 rows with either
## missing or invalid lat/lon values and will be ignored
facility_location %>%
select(tribal_yes_no) %>%
group_by(tribal_yes_no) %>%
summarise(percent = 100 * n()/nrow(facility_location))
## # A tibble: 2 × 2
## tribal_yes_no percent
## <chr> <dbl>
## 1 Not Tribal Land 99.7
## 2 Tribal Land 0.311
industry sector trends (location, chemical use, pollution), changes in industry sector prevalence question: is there a correlation between industry sector and frequency of carcinogen, pfas, and pbt use?
industry_info <- tri_data %>%
select(industry_sector, state, clean_air_act_chemical, carcinogen, metal_category, pbt, pfas, on_site_release_total, off_site_release_total, on_site_contained, off_site_contain) %>%
distinct(industry_sector, state, clean_air_act_chemical, carcinogen, metal_category, pbt, pfas, on_site_release_total, off_site_release_total, on_site_contained, off_site_contain)
#tri_data %>%
# select(industry_sector) %>%
# group_by(industry_sector) %>%
# summarise(percent = 100 * n()/nrow(tri_data)) %>%
# arrange(desc(percent))
tri_data %>%
ggplot(aes(y = fct_infreq(state), fill = industry_sector)) +
geom_bar(na.rm = TRUE, color = "white")
chemical info (type, category, etc), trends in chemical use and disposal question: is there a correlation between location and frequency of use of Clean Air Act chemicals?
chemical_info <- tri_data %>%
select(chemical, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(chemical, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
arrange(chemical)
tri_data %>%
select(chemical) %>%
group_by(chemical) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 627 × 2
## chemical percent
## <chr> <dbl>
## 1 Lead 5.25
## 2 Lead compounds 3.91
## 3 Zinc compounds 3.79
## 4 Nickel 3.17
## 5 Copper 3.09
## 6 Chromium 3.05
## 7 Ammonia 2.88
## 8 Manganese 2.85
## 9 Nitrate compounds (water dissociable; reportable only when in aqueou… 2.76
## 10 Methanol 2.70
## # ℹ 617 more rows
tri_data %>%
select(elemental_metal_included) %>%
group_by(elemental_metal_included) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 2 × 2
## elemental_metal_included percent
## <chr> <dbl>
## 1 NO 97.6
## 2 YES 2.37
tri_data %>%
select(clean_air_act_chemical) %>%
group_by(clean_air_act_chemical) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 2 × 2
## clean_air_act_chemical percent
## <chr> <dbl>
## 1 YES 62.6
## 2 NO 37.4
tri_data %>%
select(metal) %>%
group_by(metal) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 2 × 2
## metal percent
## <chr> <dbl>
## 1 NO 61.7
## 2 YES 38.3
tri_data %>%
select(metal_category) %>%
group_by(metal_category) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 6 × 2
## metal_category percent
## <chr> <dbl>
## 1 Non_Metal 53.9
## 2 Metal complound categories 20.6
## 3 Elemental metals 18.7
## 4 May contain metal 4.95
## 5 Individually-listed compounds that contain metal 1.07
## 6 Metals with qualifiers 0.733
tri_data %>%
select(carcinogen) %>%
group_by(carcinogen) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 2 × 2
## carcinogen percent
## <chr> <dbl>
## 1 NO 71.6
## 2 YES 28.4
tri_data %>%
select(pbt) %>%
group_by(pbt) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 2 × 2
## pbt percent
## <chr> <dbl>
## 1 NO 82.8
## 2 YES 17.2
tri_data %>%
select(pfas) %>%
group_by(pfas) %>%
summarise(percent = 100 * n()/nrow(tri_data)) %>%
arrange(desc(percent))
## # A tibble: 2 × 2
## pfas percent
## <chr> <dbl>
## 1 NO 100.
## 2 YES 0.0203
chemical_info %>%
select(chemical, carcinogen) %>%
filter(carcinogen == "YES") %>%
group_by(chemical)
## # A tibble: 187 × 2
## # Groups: chemical [187]
## chemical carcinogen
## <chr> <chr>
## 1 1,1,1,2-Tetrachloroethane YES
## 2 1,1,2,2-Tetrachloroethane YES
## 3 1,1-Dimethylhydrazine YES
## 4 1,2,3-Trichloropropane YES
## 5 1,2-Butylene oxide YES
## 6 1,2-Dibromo-3-chloropropane YES
## 7 1,2-Dibromoethane YES
## 8 1,2-Dichloroethane YES
## 9 1,2-Dichloropropane YES
## 10 1,2-Diphenylhydrazine YES
## # ℹ 177 more rows
chemical_info %>%
select(chemical, carcinogen) %>%
filter(carcinogen == "NO") %>%
group_by(chemical)
## # A tibble: 441 × 2
## # Groups: chemical [440]
## chemical carcinogen
## <chr> <chr>
## 1 1,1,1,2-Tetrachloro-2-fluoroethane (HCFC-121a) NO
## 2 1,1,1-Trichloroethane NO
## 3 1,1,2,2-Tetrachloro-1-fluoroethane (HCFC-121) NO
## 4 1,1,2,2-Tetrahydroperfluorodecyl acrylate NO
## 5 1,1,2,2-Tetrahydroperfluorododecyl acrylate NO
## 6 1,1,2,2-Tetrahydroperfluorohexadecyl acrylate NO
## 7 1,1,2,2-Tetrahydroperfluorotetradecyl acrylate NO
## 8 1,1,2-Trichloroethane NO
## 9 1,1-Dichloro-1,2,2,3,3-pentafluoropropane (HCFC-225cc) NO
## 10 1,1-Dichloro-1-fluoroethane (HCFC-141b) NO
## # ℹ 431 more rows
chemical_info %>%
select(chemical, pfas) %>%
filter(pfas == "YES") %>%
group_by(chemical)
## # A tibble: 56 × 2
## # Groups: chemical [56]
## chemical pfas
## <chr> <chr>
## 1 1,1,2,2-Tetrahydroperfluorodecyl acrylate YES
## 2 1,1,2,2-Tetrahydroperfluorododecyl acrylate YES
## 3 1,1,2,2-Tetrahydroperfluorohexadecyl acrylate YES
## 4 1,1,2,2-Tetrahydroperfluorotetradecyl acrylate YES
## 5 1-Decanol, 3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,10-heptadecafluoro- YES
## 6 1-Octanesulfonamide, 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro… YES
## 7 1-Octanesulfonamide, 1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptadecafluoro… YES
## 8 1-Octanesulfonamide, N-butyl-1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,8-heptade… YES
## 9 1-Propanaminium, 2-hydroxy-N,N,N-trimethyl-, 3-[(γ-ω-perfluoro-C6-20-a… YES
## 10 1-Propanaminium, 3-amino-N-(carboxymethyl)-N,N-dimethyl-, N-[2-[(γ-ω-p… YES
## # ℹ 46 more rows
chemical_info %>%
select(chemical, pfas) %>%
filter(pfas == "NO") %>%
group_by(chemical)
## # A tibble: 572 × 2
## # Groups: chemical [571]
## chemical pfas
## <chr> <chr>
## 1 1,1,1,2-Tetrachloro-2-fluoroethane (HCFC-121a) NO
## 2 1,1,1,2-Tetrachloroethane NO
## 3 1,1,1-Trichloroethane NO
## 4 1,1,2,2-Tetrachloro-1-fluoroethane (HCFC-121) NO
## 5 1,1,2,2-Tetrachloroethane NO
## 6 1,1,2-Trichloroethane NO
## 7 1,1-Dichloro-1,2,2,3,3-pentafluoropropane (HCFC-225cc) NO
## 8 1,1-Dichloro-1-fluoroethane (HCFC-141b) NO
## 9 1,1-Dimethylhydrazine NO
## 10 1,2,3-Trichloropropane NO
## # ℹ 562 more rows
chemical_info %>%
select(chemical, pbt) %>%
filter(pbt == "YES") %>%
group_by(chemical)
## # A tibble: 23 × 2
## # Groups: chemical [23]
## chemical pbt
## <chr> <chr>
## 1 Aldrin YES
## 2 Benzo[g,h,i]perylene YES
## 3 Chlordane YES
## 4 Dioxin and dioxin-like compounds YES
## 5 Heptachlor YES
## 6 Hexabromocyclododecane YES
## 7 Hexachlorobenzene YES
## 8 Isodrin YES
## 9 Lead YES
## 10 Lead And Lead Compounds YES
## # ℹ 13 more rows
chemical_info %>%
select(chemical, pbt) %>%
filter(pbt == "NO") %>%
group_by(chemical)
## # A tibble: 605 × 2
## # Groups: chemical [604]
## chemical pbt
## <chr> <chr>
## 1 1,1,1,2-Tetrachloro-2-fluoroethane (HCFC-121a) NO
## 2 1,1,1,2-Tetrachloroethane NO
## 3 1,1,1-Trichloroethane NO
## 4 1,1,2,2-Tetrachloro-1-fluoroethane (HCFC-121) NO
## 5 1,1,2,2-Tetrachloroethane NO
## 6 1,1,2,2-Tetrahydroperfluorodecyl acrylate NO
## 7 1,1,2,2-Tetrahydroperfluorododecyl acrylate NO
## 8 1,1,2,2-Tetrahydroperfluorohexadecyl acrylate NO
## 9 1,1,2,2-Tetrahydroperfluorotetradecyl acrylate NO
## 10 1,1,2-Trichloroethane NO
## # ℹ 595 more rows
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = elemental_metal_included, color = elemental_metal_included, fill = elemental_metal_included)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = clean_air_act_chemical, color = clean_air_act_chemical, fill = clean_air_act_chemical)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = metal, color = metal, fill = metal)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = metal_category, color = metal_category, fill = metal_category)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = carcinogen, color = carcinogen, fill = carcinogen)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = pbt, color = pbt, fill = pbt)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
select(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
distinct(year, elemental_metal_included, clean_air_act_chemical, metal, metal_category, carcinogen, pbt, pfas) %>%
ggplot(aes(x = pfas, color = pfas, fill = pfas)) +
geom_bar() +
coord_flip() +
facet_wrap(~year)
tri_data %>%
group_by(metal_category) %>%
summarise(prop = n()/nrow(tri_data)) %>%
ggplot(aes(x = "", y = prop, fill = fct_inorder(metal_category))) +
geom_col(color = "white", linewidth = .3) +
geom_text(aes(x = "", y = prop, label = round(prop, 2)), position = position_stack(vjust = 0.5)) +
# geom_label_repel(aes(x = "", y = prop, label = paste0(prop, "%")), size = 4.5, nudge_x = 1, show.legend = FALSE) +
coord_polar(theta = "y") +
scale_fill_brewer(palette = "PiYG")
trends in pollution, on-site vs off-site disposal question: what are the trends in pollution (chemical release) over the past 10 years?
pollution_info <- tri_data %>%
select(year, fugitive_air, stack_air, water, underground, underground_class_1, underground_class_2_through_5, landfills, land_treatment, surface_impoundment, other_disposal, on_site_release_total, public_treatment_total_transfer, off_site_release_total, off_site_recycled_total, off_site_energy_recovery_total, off_site_treated_total, total_transfer, total_releases, releases, on_site_contained, on_site_other, off_site_contain, off_site_other)
pollution_info %>%
ggplot(aes(x = year, y = fugitive_air)) +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
pollution_info %>%
ggplot(aes())
overview of data and conclusions about pollution and chemical use trends